# Clears work environment
rm(list = ls())

# Defines Shahryar's package loader
loadPkg=function(toLoad){
  for(lib in toLoad){
    if(! lib %in% installed.packages()[,1])
    { install.packages(lib, repos='http://cran.rstudio.com/') }
    suppressMessages( library(lib, character.only=TRUE) ) }
}

# Loads libraries
packs=c('ggplot2', 'gridExtra', 'foreign', 'lmtest', 'countrycode', 'apsrtable', 'stargazer', 'pscl', 'ROCR', 'R.utils', 'reshape2')
loadPkg(packs)

# Sets working directory, loads Lai & Slater data, and loads Banks regime codes for merging
setwd('C:/Users/Jason/Box Sync/Home Folder jdt34/733 - Maximum Likelihood Estimation/Lai & Slater 2006 replication')
ls = read.dta('ajpsreplicate.dta')

# CORRECTION: Removes twenty duplicate observations of Cyprus
ls = ls[which(duplicated(ls)==F),]

# CORRECTION: Swaps COW codes for Guinea and Guinea-Bissau
should.be.guinea = which(ls$ccode==404)
should.be.guinea.bissau = which(ls$ccode==438)
ls$ccode[should.be.guinea] = 438
ls$ccode[should.be.guinea.bissau] = 404

# lists IVs, subsets out NAs, creates data frame, and writes formula
dv = 'majorinitone'
DV = 'Major MID'
ivs = c('machine', 'junta', 'boss', 'strongman', 'total', 'cap', 'open', 'totalally', 
        'majorinitonelag')
IVs = c('Machine', 'Junta', 'Boss', 'Strongman', 'Total Borders', 'Capability', 'Openness', 
        'Total Allies', 'Lag DV')
lsm = na.omit(ls[,c('ccode', 'year', 'majorinitone', 'democracy', ivs)])
form = formula(paste0(dv, ' ~ ', paste(ivs, collapse=' + ')))

# Checks that all country-years are coded with one and only one regime type
lsm.check = NULL
for(i in 1:nrow(lsm)){
  lsm.check[i] = sum(lsm[i,4:8])
}
table(lsm.check)

# Lists all countries with zero observed conflict initiations
setdiff(countrycode(unique(lsm$ccode), 'cown', 'country.name'), 
        countrycode(unique(lsm$ccode[which(lsm$majorinitone==1)]), 'cown', 'country.name'))

# Prints table displaying percentage of country-year observations 
# with conflict initiations by democracy and non-democracy
table(lsm$democracy[which(lsm$majorinitone==1)]) / 
  table(lsm$democracy[which(lsm$majorinitone==0)])

# Prints table displaying percentage of non-initiator country-year
# observations by democracy and non-democracy
table(lsm$democracy[which(lsm$ccode %in% setdiff(lsm$ccode, lsm$ccode[which(lsm$majorinitone==1)]))])

# Adds country name for convenience
lsm$state = countrycode(lsm$ccode, 'cown', 'country.name')

# Adds regime type as categorical variable for ggplotting
lsm$regime = 'democracy'
lsm$regime[which(lsm$machine==1)] = 'machine'
lsm$regime[which(lsm$junta==1)] = 'junta'
lsm$regime[which(lsm$boss==1)] = 'boss'
lsm$regime[which(lsm$strongman==1)] = 'strongman'
lsm$regime = factor(lsm$regime, levels=c('democracy', 'machine', 'junta', 'boss', 'strongman'))

# Runs negative binomial and zero-inflated negative binomial models
negbinMod = glm.nb(form, data=lsm, control=glm.control(maxit=100))
znegbinMod = zeroinfl(form, data=lsm, dist='negbin')

# calls function authored by Mahmood Ara to calculate clustered standard errors on negative binomial model
source(paste0(getwd(), '/cse.R'))
negbin.cse = cse(lsm, negbinMod, lsm$ccode)

# calls function authored by David Huh to calculate clustered standard errors on zero-inflated model
source(paste0(getwd(), '/clrobustse.R'))
znegbin.cse = clrobustse(znegbinMod, lsm$ccode)

# Runs models on uncorrected data
lso = na.omit(ls[,c('ccode', 'year', 'majorinitone', 'democracy', ivs)])
negbinModo = glm.nb(form, data=lso, control=glm.control(maxit=100))
znegbinModo = zeroinfl(form, data=lso, dist='negbin')
onegbin.cse = cse(lso, negbinModo, lso$ccode)
oznegbin.cse = clrobustse(znegbinModo, lso$ccode)

# Calculates clustered errors on original model
negbinModo$se = c(onegbin.cse[,2], negbinModo$SE.theta)
names(negbinModo$se) = c(rownames(onegbin.cse), 'SE.theta')
# Calculates clustered errors  corrected model
negbinMod$se = c(negbin.cse[,2], negbinMod$SE.theta)
names(negbinMod$se) = c(rownames(negbin.cse), 'SE.theta')

# Checks difference between two models
(onegbin.cse[,1] - negbin.cse[,1]) / onegbin.cse[,1]
(onegbin.cse[,2] - negbin.cse[,2]) / onegbin.cse[,2]
(onegbin.cse[,4] - negbin.cse[,4]) / onegbin.cse[,4]

# Makes 1000 draws from multivariate normal distribution with mean=betas and sd=clustered standard errors
set.seed(6886)
sims = 1000
cimaker = function(x) {c(sort(x)[sims * .025], sort(x)[sims * .05], mean(x), sort(x)[sims * .95], sort(x)[sims * .975])}
source(paste0(getwd(), '/sndwch.R'))
negbinvcov = sndwch(model=negbinMod, family='Negative Binomial', rank=negbinMod$rank, id=lsm$ccode, small.n=F)
negbdraws = mvrnorm(sims, coef(negbinMod), negbinvcov)
zinbdraws = mvrnorm(sims, coef(znegbinMod), znegbin.cse$vcov)

# Defines function to run simulations with Lai and Slater's negative binomial model
LSpredictor = function(x) {
  X = data.matrix(cbind(1, x[,ivs]))
  b = coef(negbinMod)
  y = exp(X %*% b)
  return(y)
}

# Defines function to run simulations with zero-inflated negative binomial model
JTpredictor = function(x) {
  X = data.matrix(cbind(1, x[,ivs]))
  bz = coef(znegbinMod)[11:20]
  bc = coef(znegbinMod)[1:10]
  z = 1 / (1 + exp(-(X %*% bz)))
  c = exp(X %*% bc)
  y = (1 - z) * c
  return(y)
}


save(file='cleanedLSdata.RData', lsm, DV, IVs, dv, ivs, form, packs, loadPkg, negbinMod, znegbinMod, 
     negbin.cse, znegbin.cse, sims, cimaker, negbdraws, zinbdraws, LSpredictor, JTpredictor)